{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv('raw-data/B_train.csv', index_col='no')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "train = train.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0    357\n",
       "0.0     14\n",
       "Name: UserInfo_203, dtype: int64"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp = train[['UserInfo_203', 'flag']]\n",
    "temp = temp.dropna()\n",
    "temp.UserInfo_203.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# temp.UserInfo_27 = np.log(temp.UserInfo_27 + 1)\n",
    "# temp.ProductInfo_47 = (temp.UserInfo_27 - temp.UserInfo_27.mean()) /  temp.UserInfo_27.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "Could not interpret input 'UserInfo_27'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-116-6157b2b8b8fc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0msns\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mviolinplot\u001b[0m\u001b[0;34m(\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'flag'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'UserInfo_27'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/seaborn/categorical.py\u001b[0m in \u001b[0;36mviolinplot\u001b[0;34m(x, y, hue, data, order, hue_order, bw, cut, scale, scale_hue, gridsize, width, inner, split, orient, linewidth, color, palette, saturation, ax, **kwargs)\u001b[0m\n\u001b[1;32m   2345\u001b[0m                              \u001b[0mbw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcut\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscale\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscale_hue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgridsize\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2346\u001b[0m                              \u001b[0mwidth\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minner\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msplit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlinewidth\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2347\u001b[0;31m                              color, palette, saturation)\n\u001b[0m\u001b[1;32m   2348\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2349\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0max\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/seaborn/categorical.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, x, y, hue, data, order, hue_order, bw, cut, scale, scale_hue, gridsize, width, inner, split, orient, linewidth, color, palette, saturation)\u001b[0m\n\u001b[1;32m    539\u001b[0m                  color, palette, saturation):\n\u001b[1;32m    540\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 541\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mestablish_variables\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhue_order\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    542\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mestablish_colors\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpalette\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msaturation\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    543\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mestimate_densities\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcut\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscale\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscale_hue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgridsize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/home/shan/anaconda3/lib/python3.5/site-packages/seaborn/categorical.py\u001b[0m in \u001b[0;36mestablish_variables\u001b[0;34m(self, x, y, hue, data, orient, order, hue_order, units)\u001b[0m\n\u001b[1;32m    149\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstring_types\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    150\u001b[0m                     \u001b[0merr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Could not interpret input '{}'\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    153\u001b[0m             \u001b[0;31m# Figure out the plotting orientation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mValueError\u001b[0m: Could not interpret input 'UserInfo_27'"
     ]
    }
   ],
   "source": [
    "sns.violinplot( x='flag', y='UserInfo_27', data=temp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "UserInfo_203  flag\n",
       "0.0           0        12\n",
       "              1         2\n",
       "1.0           0       344\n",
       "              1        13\n",
       "dtype: int64"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "temp.groupby(['UserInfo_203', 'flag']).apply(lambda x: len(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
